#!/usr/bin/env python
# -*-coding=utf-8-*-

import numpy as np
from utils import data_utils
from tensorflow.contrib import learn

def papre_train_data(train_data_path, max_sent_length, num_classes, train_percent=0.9):
    print("Loading data...")
    x_text, y = data_utils.load_fastext_train_data(train_data_path)

    vocab_processor = learn.preprocessing.VocabularyProcessor(max_sent_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))
    y = data_utils.label_to_array(y, num_classes)

    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    sample_count = len(x_text)
    train_count = int(sample_count * train_percent)
    x_train, x_dev = x_shuffled[: train_count], x_shuffled[train_count:]
    y_train, y_dev = y_shuffled[: train_count], y_shuffled[train_count:]
    return x_train, y_train, x_dev, y_dev, vocab_processor



def batch_iter(data, batch_size, num_epochs, shuffle=True):
    """
    """
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int(len(data)/batch_size) + 1
    for epoch in range(num_epochs):
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]
